library(stringr)
library(tidyverse)
library(bigrquery)
library(MuMIn)
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
options(na.action = "na.fail") 
source("./dredge_functions.R")
dredge_and_subset <- function(data) {
  model <- lm(
    response ~ foraging_niche + trophic_niche + is_nocturnal + pc1 + pc2 + pc3 + pc4 + total_species,
    data=data
  )
  dredge_result <- dredge(model)
  summary(model.avg(dredge_result, subset = delta < 10))
}
load_niche_data <- function() {
  filename <- 'species_analysis_cache.csv'
  
  if (!file.exists(filename)) {
    
    sql <- "
        SELECT 
          niche_name,
          foraging_niche, 
          trophic_niche, 
          is_noctural, 
          total_species,
          merlin_10_perc_ratio, 
          merlin_20_perc_ratio, 
          birdlife_10_perc_ratio, 
          birdlife_20_perc_ratio, 
          either_10_perc_ratio, 
          either_20_perc_ratio, 
          both_10_perc_ratio, 
          both_20_perc_ratio, 
          body_morphspace.pc1.mean AS pc1, 
          body_morphspace.pc2.mean AS pc2, 
          body_morphspace.pc3.mean AS pc3, 
          body_morphspace.pc4.mean AS pc4
        FROM `endless-matter-297214.model2.niche_analysis_model` LIMIT 1000 
    "
  
    tb <- bq_project_query(projectid, sql)

    data <- bq_table_download(tb)
    write_csv(data, filename)
  }
  
  data <- read_csv(filename)
  
  data[is.na(data$foraging_niche),]$foraging_niche <- 'Omnivore'
  
  data$foraging_niche = as.factor(data$foraging_niche)
  data$trophic_niche = as.factor(data$trophic_niche)
  data$is_nocturnal = as.factor(data$is_noctural)
  
  data
}

data_for_response <- function(column_name_for_response) {
  data <- load_niche_data()
  names(data)[names(data) == column_name_for_response] <- "response"
  
  data[,c("niche_name", "response", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}
0.25 Percentile - 10% of species
merlin_10_data <- data_for_response('merlin_10_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical(),
  total_species = col_double(),
  merlin_10_perc_ratio = col_double(),
  merlin_20_perc_ratio = col_double(),
  birdlife_10_perc_ratio = col_double(),
  birdlife_20_perc_ratio = col_double(),
  either_10_perc_ratio = col_double(),
  either_20_perc_ratio = col_double(),
  both_10_perc_ratio = col_double(),
  both_20_perc_ratio = col_double(),
  pc1 = col_double(),
  pc2 = col_double(),
  pc3 = col_double(),
  pc4 = col_double()
)
merlin_10_data
merlin_10_result <- dredge_and_subset(merlin_10_data)
Fixed term is "(Intercept)"
merlin_10_summ <- model_summary('merlin_10', 'species', merlin_10_result)
merlin_10_summ
birdlife_10_data <- data_for_response('birdlife_10_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical(),
  total_species = col_double(),
  merlin_10_perc_ratio = col_double(),
  merlin_20_perc_ratio = col_double(),
  birdlife_10_perc_ratio = col_double(),
  birdlife_20_perc_ratio = col_double(),
  either_10_perc_ratio = col_double(),
  either_20_perc_ratio = col_double(),
  both_10_perc_ratio = col_double(),
  both_20_perc_ratio = col_double(),
  pc1 = col_double(),
  pc2 = col_double(),
  pc3 = col_double(),
  pc4 = col_double()
)
birdlife_10_result <- dredge_and_subset(birdlife_10_data)
Fixed term is "(Intercept)"
birdlife_10_summ <- model_summary('birdlife_10', 'species', birdlife_10_result)
birdlife_10_summ
both_10_data <- data_for_response('both_10_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical(),
  total_species = col_double(),
  merlin_10_perc_ratio = col_double(),
  merlin_20_perc_ratio = col_double(),
  birdlife_10_perc_ratio = col_double(),
  birdlife_20_perc_ratio = col_double(),
  either_10_perc_ratio = col_double(),
  either_20_perc_ratio = col_double(),
  both_10_perc_ratio = col_double(),
  both_20_perc_ratio = col_double(),
  pc1 = col_double(),
  pc2 = col_double(),
  pc3 = col_double(),
  pc4 = col_double()
)
both_10_result <- dredge_and_subset(both_10_data)
Fixed term is "(Intercept)"
both_10_summ <- model_summary('both_10', 'species', both_10_result)
both_10_summ
either_10_data <- data_for_response('either_10_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical(),
  total_species = col_double(),
  merlin_10_perc_ratio = col_double(),
  merlin_20_perc_ratio = col_double(),
  birdlife_10_perc_ratio = col_double(),
  birdlife_20_perc_ratio = col_double(),
  either_10_perc_ratio = col_double(),
  either_20_perc_ratio = col_double(),
  both_10_perc_ratio = col_double(),
  both_20_perc_ratio = col_double(),
  pc1 = col_double(),
  pc2 = col_double(),
  pc3 = col_double(),
  pc4 = col_double()
)
either_10_result <- dredge_and_subset(either_10_data)
Fixed term is "(Intercept)"
either_10_summ <- model_summary('either_10', 'species', either_10_result)
either_10_summ
Full result for 10% of species
all_species_results <- full_join(full_join(merlin_10_summ, birdlife_10_summ), full_join(both_10_summ, either_10_summ))
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
write_csv(all_species_results, "species_analysis_result_10_perc.csv")
0.75 Percentile - 20% of species
merlin_20_data <- data_for_response('merlin_20_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical(),
  total_species = col_double(),
  merlin_10_perc_ratio = col_double(),
  merlin_20_perc_ratio = col_double(),
  birdlife_10_perc_ratio = col_double(),
  birdlife_20_perc_ratio = col_double(),
  either_10_perc_ratio = col_double(),
  either_20_perc_ratio = col_double(),
  both_10_perc_ratio = col_double(),
  both_20_perc_ratio = col_double(),
  pc1 = col_double(),
  pc2 = col_double(),
  pc3 = col_double(),
  pc4 = col_double()
)
merlin_20_result <- dredge_and_subset(merlin_20_data)
Fixed term is "(Intercept)"
merlin_20_summ <- model_summary('merlin_20', 'species', merlin_20_result)
merlin_20_summ
birdlife_20_data <- data_for_response('birdlife_20_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical(),
  total_species = col_double(),
  merlin_10_perc_ratio = col_double(),
  merlin_20_perc_ratio = col_double(),
  birdlife_10_perc_ratio = col_double(),
  birdlife_20_perc_ratio = col_double(),
  either_10_perc_ratio = col_double(),
  either_20_perc_ratio = col_double(),
  both_10_perc_ratio = col_double(),
  both_20_perc_ratio = col_double(),
  pc1 = col_double(),
  pc2 = col_double(),
  pc3 = col_double(),
  pc4 = col_double()
)
birdlife_20_result <- dredge_and_subset(birdlife_20_data)
Fixed term is "(Intercept)"
birdlife_20_summ <- model_summary('birdlife_20', 'species', birdlife_20_result)
birdlife_20_summ
both_20_data <- data_for_response('both_20_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical(),
  total_species = col_double(),
  merlin_10_perc_ratio = col_double(),
  merlin_20_perc_ratio = col_double(),
  birdlife_10_perc_ratio = col_double(),
  birdlife_20_perc_ratio = col_double(),
  either_10_perc_ratio = col_double(),
  either_20_perc_ratio = col_double(),
  both_10_perc_ratio = col_double(),
  both_20_perc_ratio = col_double(),
  pc1 = col_double(),
  pc2 = col_double(),
  pc3 = col_double(),
  pc4 = col_double()
)
both_20_result <- dredge_and_subset(both_20_data)
Fixed term is "(Intercept)"
both_20_summ <- model_summary('both_20', 'species', both_20_result)
both_20_summ
either_20_data <- data_for_response('either_20_perc_ratio')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  niche_name = col_character(),
  foraging_niche = col_character(),
  trophic_niche = col_character(),
  is_noctural = col_logical(),
  total_species = col_double(),
  merlin_10_perc_ratio = col_double(),
  merlin_20_perc_ratio = col_double(),
  birdlife_10_perc_ratio = col_double(),
  birdlife_20_perc_ratio = col_double(),
  either_10_perc_ratio = col_double(),
  either_20_perc_ratio = col_double(),
  both_10_perc_ratio = col_double(),
  both_20_perc_ratio = col_double(),
  pc1 = col_double(),
  pc2 = col_double(),
  pc3 = col_double(),
  pc4 = col_double()
)
either_20_result <- dredge_and_subset(either_20_data)
Fixed term is "(Intercept)"
either_20_summ <- model_summary('either_20', 'species', either_20_result)
either_20_summ
Full result for 10% of species
all_species_results <- full_join(full_join(merlin_20_summ, birdlife_20_summ), full_join(both_20_summ, either_20_summ))
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
write_csv(all_species_results, "species_analysis_result_20_perc.csv")
Comparing the 2 percentiles
ggplot(merlin_10_data, aes(x = response)) + geom_bar(width = 0.1)
Warning: position_stack requires non-overlapping x intervals

bind_sets <- function(first_percentile, second_percentile) {
  first_percentile$percentile <- "0.25"
  second_percentile$percentile <- "0.75"
  rbind(first_percentile, second_percentile)
}

merlin_species_data <- bind_sets(merlin_10_data,  merlin_20_data)
birdlife_species_data <- bind_sets(birdlife_10_data,  birdlife_20_data)
either_species_data <- bind_sets(either_10_data,  either_20_data)
both_species_data <- bind_sets(both_10_data,  both_20_data)
ggplot(merlin_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

merlin_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova)

Error: niche_name
               Df Sum Sq Mean Sq F value Pr(>F)  
trophic_niche   9  1.762  0.1958   1.679 0.0943 .
Residuals     258 30.084  0.1166                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.508  1.5077   59.65 2.29e-13 ***
Residuals  267  6.749  0.0253                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova_i)

Error: niche_name
               Df Sum Sq Mean Sq F value Pr(>F)  
trophic_niche   9  1.762  0.1958   1.679 0.0943 .
Residuals     258 30.084  0.1166                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
                          Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                 1  1.508  1.5077  69.821 4.05e-15 ***
trophic_niche:percentile   9  1.177  0.1308   6.057 1.03e-07 ***
Residuals                258  5.571  0.0216                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pairwise.wilcox.test(merlin_diff_data$increase, merlin_diff_data$trophic_niche)
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
  cannot compute exact p-value with ties

    Pairwise comparisons using Wilcoxon rank sum test with continuity correction 

data:  merlin_diff_data$increase and merlin_diff_data$trophic_niche 

                      Aquatic predator Frugivore Granivore Herbivore aquatic Herbivore terrestrial Invertivore Nectarivore Omnivore Scavenger
Frugivore             0.5331           -         -         -                 -                     -           -           -        -        
Granivore             1.0000           1.0000    -         -                 -                     -           -           -        -        
Herbivore aquatic     1.0000           0.1167    1.0000    -                 -                     -           -           -        -        
Herbivore terrestrial 1.0000           1.0000    1.0000    1.0000            -                     -           -           -        -        
Invertivore           0.0076           1.0000    1.0000    0.0051            1.0000                -           -           -        -        
Nectarivore           1.0000           1.0000    1.0000    1.0000            1.0000                1.0000      -           -        -        
Omnivore              0.2684           1.0000    1.0000    0.0679            1.0000                1.0000      1.0000      -        -        
Scavenger             1.0000           1.0000    1.0000    1.0000            1.0000                1.0000      1.0000      1.0000   -        
Vertivore             1.0000           1.0000    1.0000    0.5929            1.0000                1.0000      1.0000      1.0000   1.0000   

P value adjustment method: holm 
library(multcomp)
merlin_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=merlin_diff_data)
summary(merlin_increase_tropic_niche_anova)
               Df Sum Sq Mean Sq F value   Pr(>F)    
trophic_niche   9  3.941  0.4379   3.689 0.000229 ***
Residuals     258 30.629  0.1187                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_increase_tropic_niche_tukey <- cld(glht(merlin_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
merlin_increase_tropic_niche_tukey
     Aquatic predator             Frugivore             Granivore     Herbivore aquatic Herbivore terrestrial           Invertivore           Nectarivore              Omnivore 
                 "bc"                  "ab"                  "ac"                   "c"                  "ac"                   "a"                  "ac"                  "ab" 
            Scavenger             Vertivore 
                 "ac"                  "ac" 
plot(merlin_increase_tropic_niche_tukey)

with.tukey.label.as.group <- function(tukey, dataset, join_by) {
  labels <- data.frame(tukey$mcletters$Letters)
  labels$category <- rownames(labels)
  colnames(labels) <- c("group", "category")
  
  
  left_join(dataset, labels, by = join_by)
}

ggplot(birdlife_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova)

Error: niche_name
               Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche   9  1.696  0.1885   1.605  0.114
Residuals     258 30.292  0.1174               

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.327  1.3274   60.18 1.84e-13 ***
Residuals  267  5.890  0.0221                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova_i)

Error: niche_name
               Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche   9  1.696  0.1885   1.605  0.114
Residuals     258 30.292  0.1174               

Error: Within
                          Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                 1  1.327  1.3274  64.018 4.19e-14 ***
trophic_niche:percentile   9  0.540  0.0600   2.893  0.00282 ** 
Residuals                258  5.350  0.0207                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=birdlife_diff_data)
summary(birdlife_increase_tropic_niche_anova)
               Df Sum Sq Mean Sq F value Pr(>F)  
trophic_niche   9   2.21  0.2453   1.791 0.0703 .
Residuals     258  35.35  0.1370                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_increase_tropic_niche_tukey <- cld(glht(birdlife_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
birdlife_increase_tropic_niche_tukey
     Aquatic predator             Frugivore             Granivore     Herbivore aquatic Herbivore terrestrial           Invertivore           Nectarivore              Omnivore 
                  "b"                  "ab"                  "ab"                  "ab"                  "ab"                   "a"                  "ab"                  "ab" 
            Scavenger             Vertivore 
                 "ab"                  "ab" 

ggplot(either_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

ggplot(both_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

nrow(merlin_10_data[merlin_10_data$response > 0,])
[1] 96
nrow(merlin_20_data[merlin_20_data$response > 0,])
[1] 128
nrow(birdlife_10_data[birdlife_10_data$response > 0,])
[1] 87
nrow(birdlife_20_data[birdlife_20_data$response > 0,])
[1] 124
nrow(either_10_data[either_10_data$response > 0,])
[1] 94
nrow(either_20_data[either_20_data$response > 0,])
[1] 123
nrow(both_10_data[both_10_data$response > 0,])
[1] 91
nrow(both_20_data[both_20_data$response > 0,])
[1] 128
ggplot(merlin_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()

interaction.plot(merlin_species_data$foraging_niche, merlin_species_data$percentile, merlin_species_data$response)

merlin_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova)

Error: niche_name
                Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche  32  4.894  0.1529   1.333  0.118
Residuals      235 26.952  0.1147               

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.508  1.5077   59.65 2.29e-13 ***
Residuals  267  6.749  0.0253                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova_i)

Error: niche_name
                Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche  32  4.894  0.1529   1.333  0.118
Residuals      235 26.952  0.1147               

Error: Within
                           Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                  1  1.508  1.5077  68.706 8.81e-15 ***
foraging_niche:percentile  32  1.591  0.0497   2.266 0.000275 ***
Residuals                 235  5.157  0.0219                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_increase_foraging_niche_anova <-aov(increase~foraging_niche, data=merlin_diff_data)
summary(merlin_increase_foraging_niche_anova)
                Df Sum Sq Mean Sq F value Pr(>F)  
foraging_niche  32  5.804  0.1814   1.482 0.0533 .
Residuals      235 28.766  0.1224                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_increase_foraging_niche_tukey <- cld(glht(merlin_increase_foraging_niche_anova, linfct=mcp(foraging_niche="Tukey")))
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
merlin_increase_foraging_niche_tukey
            Aquatic aerial               Aquatic dive             Aquatic ground              Aquatic perch             Aquatic plunge            Aquatic surface            Fugivore aerial 
                       "a"                        "a"                        "a"                        "a"                        "a"                        "a"                        "a" 
            Fugivore glean            Fugivore ground                 Generalist         Granivore arboreal           Granivore ground     Herbivore aquatic dive   Herbivore aquatic ground 
                       "a"                        "a"                        "a"                        "a"                        "a"                        "a"                        "a" 
 Herbivore aquatic surface         Herbivore arboreal           Herbivore ground         Invertivore aerial           Invertivore bark Invertivore glean arboreal         Invertivore ground 
                       "a"                        "a"                        "a"                        "a"                        "a"                        "a"                        "a" 
     Invertivore sally air   Invertivore sally ground  Invertivore sally surface         Nectarivore aerial          Nectarivore glean                   Omnivore           Scavenger ground 
                       "a"                        "a"                        "a"                        "a"                        "a"                        "a"                        "a" 
          Vertivore aerial   Vertivore air to surface         Vertivore arboreal           Vertivore ground            Vertivore perch 
                       "a"                        "a"                        "a"                        "a"                        "a" 

ggplot(birdlife_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova)

Error: niche_name
                Df Sum Sq Mean Sq F value Pr(>F)  
foraging_niche  32  5.519  0.1725   1.531 0.0401 *
Residuals      235 26.470  0.1126                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.327  1.3274   60.18 1.84e-13 ***
Residuals  267  5.890  0.0221                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova_i)

Error: niche_name
                Df Sum Sq Mean Sq F value Pr(>F)  
foraging_niche  32  5.519  0.1725   1.531 0.0401 *
Residuals      235 26.470  0.1126                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
                           Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                  1  1.327  1.3274  67.870 1.22e-14 ***
foraging_niche:percentile  32  1.293  0.0404   2.066  0.00118 ** 
Residuals                 235  4.596  0.0196                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_increase_foraging_niche_anova <-aov(increase~foraging_niche, data=birdlife_diff_data)
summary(birdlife_increase_foraging_niche_anova)
                Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche  32   4.85  0.1517    1.09  0.347
Residuals      235  32.71  0.1392               
birdlife_increase_foraging_niche_tukey <- cld(glht(birdlife_increase_foraging_niche_anova, linfct=mcp(foraging_niche="Tukey")))
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
  Completion with error > abseps
birdlife_increase_foraging_niche_tukey
            Aquatic aerial               Aquatic dive             Aquatic ground              Aquatic perch             Aquatic plunge            Aquatic surface            Fugivore aerial 
                       "a"                        "a"                        "a"                        "a"                        "a"                        "a"                        "a" 
            Fugivore glean            Fugivore ground                 Generalist         Granivore arboreal           Granivore ground     Herbivore aquatic dive   Herbivore aquatic ground 
                       "a"                        "a"                        "a"                        "a"                        "a"                        "a"                        "a" 
 Herbivore aquatic surface         Herbivore arboreal           Herbivore ground         Invertivore aerial           Invertivore bark Invertivore glean arboreal         Invertivore ground 
                       "a"                        "a"                        "a"                        "a"                        "a"                        "a"                        "a" 
     Invertivore sally air   Invertivore sally ground  Invertivore sally surface         Nectarivore aerial          Nectarivore glean                   Omnivore           Scavenger ground 
                       "a"                        "a"                        "a"                        "a"                        "a"                        "a"                        "a" 
          Vertivore aerial   Vertivore air to surface         Vertivore arboreal           Vertivore ground            Vertivore perch 
                       "a"                        "a"                        "a"                        "a"                        "a" 
birdlife_diff_data2 <- with.tukey.label.as.group(birdlife_increase_foraging_niche_tukey, birdlife_diff_data, c("foraging_niche" = "category"))
ggplot(birdlife_diff_data2, aes(x = increase, y = foraging_niche, color = group)) + geom_boxplot() + theme_bw()

library(ggpubr)
pc_axis_figure <- function(dataset) {
  annotate_figure(
ggarrange(
  ggplot(dataset, aes(x = pc1, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc2, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc3, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc4, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  nrow = 2, ncol = 2, common.legend = T, label.x = 0),
left = text_grob("log(response)", rot = 90))
}
pc_axis_figure(merlin_species_data)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).

library(lme4)
merlin_pc1_model = lmer(response ~ pc1 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc1_model)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
pc1               1 0.00042 0.00042  0.0169
percentile        1 1.50774 1.50774 59.9817
pc1:percentile    1 0.06215 0.06215  2.4724
summary(merlin_pc1_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc1 * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: -8.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.4461 -0.5197  0.0155  0.1840  3.3433 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04729  0.2175  
 Residual               0.02514  0.1585  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                    Estimate Std. Error t value
(Intercept)         0.117739   0.022322   5.275
pc1                -0.005011   0.006481  -0.773
percentile0.75      0.086293   0.018597   4.640
pc1:percentile0.75  0.008490   0.005399   1.572

Correlation of Fixed Effects:
            (Intr) pc1    pr0.75
pc1         -0.676              
percntl0.75 -0.417  0.282       
pc1:prc0.75  0.282 -0.417 -0.676
merlin_pc2_model = lmer(response ~ pc2 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc2_model)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
pc2               1 0.01486 0.01486  0.5864
percentile        1 1.50774 1.50774 59.5009
pc2:percentile    1 0.00812 0.00812  0.3203
summary(merlin_pc2_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc2 * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: -11.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.3691 -0.5150  0.0879  0.1354  3.3292 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04706  0.2169  
 Residual               0.02534  0.1592  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                    Estimate Std. Error t value
(Intercept)         0.104389   0.016836   6.200
pc2                -0.008532   0.018597  -0.459
percentile0.75      0.104347   0.014086   7.408
pc2:percentile0.75 -0.008805   0.015559  -0.566

Correlation of Fixed Effects:
            (Intr) pc2    pr0.75
pc2          0.217              
percntl0.75 -0.418 -0.091       
pc2:prc0.75 -0.091 -0.418  0.217
merlin_pc3_model = lmer(response ~ pc3 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc3_model)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
pc3               1 0.00044 0.00044  0.0187
percentile        1 1.50774 1.50774 63.7963
pc3:percentile    1 0.46195 0.46195 19.5464
summary(merlin_pc3_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc3 * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: -30.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.8609 -0.4057 -0.0338  0.2719  3.7473 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04804  0.2192  
 Residual               0.02363  0.1537  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                   Estimate Std. Error t value
(Intercept)         0.10758    0.01638   6.568
pc3                -0.04333    0.02594  -1.670
percentile0.75      0.10282    0.01330   7.730
pc3:percentile0.75  0.09314    0.02107   4.421

Correlation of Fixed Effects:
            (Intr) pc3    pr0.75
pc3         -0.055              
percntl0.75 -0.406  0.022       
pc3:prc0.75  0.022 -0.406 -0.055
merlin_pc4_model = lmer(response ~ pc4 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc4_model)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
pc4               1 0.06576 0.06576  2.6061
percentile        1 1.50774 1.50774 59.7558
pc4:percentile    1 0.03687 0.03687  1.4612
summary(merlin_pc4_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc4 * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: -17

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.4101 -0.5034  0.0731  0.1400  3.4938 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04666  0.2160  
 Residual               0.02523  0.1588  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                   Estimate Std. Error t value
(Intercept)         0.10678    0.01640   6.513
pc4                 0.03400    0.03543   0.960
percentile0.75      0.10683    0.01374   7.777
pc4:percentile0.75  0.03588    0.02969   1.209

Correlation of Fixed Effects:
            (Intr) pc4    pr0.75
pc4          0.046              
percntl0.75 -0.419 -0.019       
pc4:prc0.75 -0.019 -0.419  0.046
pc_axis_figure(birdlife_species_data)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).

ggplot(merlin_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()

merlin_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova)

Error: niche_name
              Df Sum Sq Mean Sq F value Pr(>F)  
is_nocturnal   1  0.462  0.4615   3.912  0.049 *
Residuals    266 31.384  0.1180                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.508  1.5077   59.65 2.29e-13 ***
Residuals  267  6.749  0.0253                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova_i)

Error: niche_name
              Df Sum Sq Mean Sq F value Pr(>F)  
is_nocturnal   1  0.462  0.4615   3.912  0.049 *
Residuals    266 31.384  0.1180                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Error: Within
                         Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                1  1.508  1.5077  60.685 1.51e-13 ***
is_nocturnal:percentile   1  0.140  0.1396   5.618   0.0185 *  
Residuals               266  6.609  0.0248                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
ggplot(birdlife_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova)

Error: niche_name
              Df Sum Sq Mean Sq F value Pr(>F)
is_nocturnal   1   0.20  0.1982   1.658  0.199
Residuals    266  31.79  0.1195               

Error: Within
            Df Sum Sq Mean Sq F value   Pr(>F)    
percentile   1  1.327  1.3274   60.18 1.84e-13 ***
Residuals  267  5.890  0.0221                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova_i)

Error: niche_name
              Df Sum Sq Mean Sq F value Pr(>F)
is_nocturnal   1   0.20  0.1982   1.658  0.199
Residuals    266  31.79  0.1195               

Error: Within
                         Df Sum Sq Mean Sq F value   Pr(>F)    
percentile                1  1.327  1.3274  60.370 1.72e-13 ***
is_nocturnal:percentile   1  0.041  0.0407   1.852    0.175    
Residuals               266  5.849  0.0220                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
ggplot(merlin_species_data, aes(x = total_species, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw()
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).

merlin_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_species_count_model)
Analysis of Variance Table
                         npar  Sum Sq Mean Sq F value
total_species               1 0.00052 0.00052  0.0206
percentile                  1 1.50774 1.50774 59.4316
total_species:percentile    1 0.00026 0.00026  0.0103
summary(merlin_species_count_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ total_species * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: 8.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.3523 -0.5462  0.1109  0.1216  3.2582 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04717  0.2172  
 Residual               0.02537  0.1593  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                               Estimate Std. Error t value
(Intercept)                   1.064e-01  1.690e-02   6.295
total_species                -1.420e-05  1.612e-04  -0.088
percentile0.75                1.064e-01  1.414e-02   7.527
total_species:percentile0.75 -1.367e-05  1.348e-04  -0.101

Correlation of Fixed Effects:
            (Intr) ttl_sp pr0.75
total_specs -0.229              
percntl0.75 -0.418  0.096       
ttl_sp:0.75  0.096 -0.418 -0.229
birdlife_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=birdlife_species_data)
anova(birdlife_species_count_model)
Analysis of Variance Table
                         npar Sum Sq Mean Sq F value
total_species               1 0.0000  0.0000  0.0001
percentile                  1 1.3274  1.3274 59.9531
total_species:percentile    1 0.0000  0.0000  0.0001
summary(birdlife_species_count_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ total_species * percentile + (1 | niche_name)
   Data: birdlife_species_data

REML criterion at convergence: -26.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.5919 -0.5189  0.0633  0.1497  3.4600 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.04906  0.2215  
 Residual               0.02214  0.1488  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                              Estimate Std. Error t value
(Intercept)                  9.946e-02  1.675e-02   5.939
total_species                9.392e-07  1.597e-04   0.006
percentile0.75               9.949e-02  1.321e-02   7.534
total_species:percentile0.75 1.518e-06  1.260e-04   0.012

Correlation of Fixed Effects:
            (Intr) ttl_sp pr0.75
total_specs -0.229              
percntl0.75 -0.394  0.090       
ttl_sp:0.75  0.090 -0.394 -0.229
merlin_species_data$present <- merlin_species_data$response > 0
ggplot(merlin_species_data, aes(x = log(total_species), y = present, color = percentile)) + geom_boxplot() + theme_bw()

merlin_present_model = lmer(present ~ log(total_species) * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_present_model)
Analysis of Variance Table
                              npar Sum Sq Mean Sq  F value
log(total_species)               1 7.6804  7.6804 145.5124
percentile                       1 1.9104  1.9104  36.1952
log(total_species):percentile    1 0.0496  0.0496   0.9393
summary(merlin_present_model)
Linear mixed model fit by REML ['lmerMod']
Formula: present ~ log(total_species) * percentile + (1 | niche_name)
   Data: merlin_species_data

REML criterion at convergence: 410.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.25242 -0.44048  0.02734  0.24296  2.14911 

Random effects:
 Groups     Name        Variance Std.Dev.
 niche_name (Intercept) 0.11259  0.3356  
 Residual               0.05278  0.2297  
Number of obs: 536, groups:  niche_name, 268

Fixed effects:
                                  Estimate Std. Error t value
(Intercept)                        0.10521    0.03325   3.164
log(total_species)                 0.19281    0.01685  11.446
percentile0.75                     0.13652    0.02657   5.139
log(total_species):percentile0.75 -0.01304    0.01346  -0.969

Correlation of Fixed Effects:
            (Intr) lg(t_) pr0.75
lg(ttl_spc) -0.665              
percntl0.75 -0.399  0.266       
lg(t_):0.75  0.266 -0.399 -0.665
---
title: "R Notebook"
output: html_notebook
---
```{r}
library(stringr)
library(tidyverse)
library(bigrquery)
library(MuMIn)
```

```{r}
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
```

```{r}
options(na.action = "na.fail") 
```

```{r}
source("./dredge_functions.R")
```

```{r}
dredge_and_subset <- function(data) {
  model <- lm(
    response ~ foraging_niche + trophic_niche + is_nocturnal + pc1 + pc2 + pc3 + pc4 + total_species,
    data=data
  )
  dredge_result <- dredge(model)
  summary(model.avg(dredge_result, subset = delta < 10))
}
```

```{r}
load_niche_data <- function() {
  filename <- 'species_analysis_cache.csv'
  
  if (!file.exists(filename)) {
    
    sql <- "
        SELECT 
          niche_name,
          foraging_niche, 
          trophic_niche, 
          is_noctural, 
          total_species,
          merlin_10_perc_ratio, 
          merlin_20_perc_ratio, 
          birdlife_10_perc_ratio, 
          birdlife_20_perc_ratio, 
          either_10_perc_ratio, 
          either_20_perc_ratio, 
          both_10_perc_ratio, 
          both_20_perc_ratio, 
          body_morphspace.pc1.mean AS pc1, 
          body_morphspace.pc2.mean AS pc2, 
          body_morphspace.pc3.mean AS pc3, 
          body_morphspace.pc4.mean AS pc4
        FROM `endless-matter-297214.model2.niche_analysis_model` LIMIT 1000 
    "
  
    tb <- bq_project_query(projectid, sql)

    data <- bq_table_download(tb)
    write_csv(data, filename)
  }
  
  data <- read_csv(filename)
  
  data[is.na(data$foraging_niche),]$foraging_niche <- 'Omnivore'
  
  data$foraging_niche = as.factor(data$foraging_niche)
  data$trophic_niche = as.factor(data$trophic_niche)
  data$is_nocturnal = as.factor(data$is_noctural)
  
  data
}

data_for_response <- function(column_name_for_response) {
  data <- load_niche_data()
  names(data)[names(data) == column_name_for_response] <- "response"
  
  data[,c("niche_name", "response", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}
```

---------------------------------
0.25 Percentile - 10% of species
---------------------------------

```{r}
merlin_10_data <- data_for_response('merlin_10_perc_ratio')
merlin_10_data
```


```{r}
merlin_10_result <- dredge_and_subset(merlin_10_data)
merlin_10_summ <- model_summary('merlin_10', 'species', merlin_10_result)
merlin_10_summ
```

```{r}
birdlife_10_data <- data_for_response('birdlife_10_perc_ratio')
birdlife_10_result <- dredge_and_subset(birdlife_10_data)
birdlife_10_summ <- model_summary('birdlife_10', 'species', birdlife_10_result)
birdlife_10_summ
```


```{r}
both_10_data <- data_for_response('both_10_perc_ratio')
both_10_result <- dredge_and_subset(both_10_data)
both_10_summ <- model_summary('both_10', 'species', both_10_result)
both_10_summ
```


```{r}
either_10_data <- data_for_response('either_10_perc_ratio')
either_10_result <- dredge_and_subset(either_10_data)
either_10_summ <- model_summary('either_10', 'species', either_10_result)
either_10_summ
```

------------------------------
Full result for 10% of species
------------------------------
```{r}
all_species_results <- full_join(full_join(merlin_10_summ, birdlife_10_summ), full_join(both_10_summ, either_10_summ))
write_csv(all_species_results, "species_analysis_result_10_perc.csv")
```

---------------------------------
0.75 Percentile - 20% of species
---------------------------------

```{r}
merlin_20_data <- data_for_response('merlin_20_perc_ratio')
merlin_20_result <- dredge_and_subset(merlin_20_data)
merlin_20_summ <- model_summary('merlin_20', 'species', merlin_20_result)
merlin_20_summ
```

```{r}
birdlife_20_data <- data_for_response('birdlife_20_perc_ratio')
birdlife_20_result <- dredge_and_subset(birdlife_20_data)
birdlife_20_summ <- model_summary('birdlife_20', 'species', birdlife_20_result)
birdlife_20_summ
```

```{r}
both_20_data <- data_for_response('both_20_perc_ratio')
both_20_result <- dredge_and_subset(both_20_data)
both_20_summ <- model_summary('both_20', 'species', both_20_result)
both_20_summ
```

```{r}
either_20_data <- data_for_response('either_20_perc_ratio')
either_20_result <- dredge_and_subset(either_20_data)
either_20_summ <- model_summary('either_20', 'species', either_20_result)
either_20_summ
```

------------------------------
Full result for 10% of species
------------------------------
```{r}
all_species_results <- full_join(full_join(merlin_20_summ, birdlife_20_summ), full_join(both_20_summ, either_20_summ))
write_csv(all_species_results, "species_analysis_result_20_perc.csv")
```

------------------------------
Comparing the 2 percentiles
------------------------------

```{r}
ggplot(merlin_10_data, aes(x = response)) + geom_bar(width = 0.1)
```

```{r}
bind_sets <- function(first_percentile, second_percentile) {
  first_percentile$percentile <- "0.25"
  second_percentile$percentile <- "0.75"
  rbind(first_percentile, second_percentile)
}

merlin_species_data <- bind_sets(merlin_10_data,  merlin_20_data)
birdlife_species_data <- bind_sets(birdlife_10_data,  birdlife_20_data)
either_species_data <- bind_sets(either_10_data,  either_20_data)
both_species_data <- bind_sets(both_10_data,  both_20_data)
```

```{r}
diff_set <- function(first_percentile, second_percentile) {
  second_percentile$response_20 <- second_percentile$response
  
  result <- right_join(first_percentile, second_percentile[,c("niche_name", "response_20")], by = c("niche_name"))
  result$diff <- result$response_20 - result$response
  result$increase <- result$diff / result$response_20
  result$increase[is.na(result$increase)] = 0
  result$response_10 <- result$response
  result[,c("response_10", "response_20", "diff", "increase", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}

merlin_diff_data <- diff_set(merlin_10_data,  merlin_20_data)
birdlife_diff_data <- diff_set(birdlife_10_data,  birdlife_20_data)
either_diff_data <- diff_set(either_10_data,  either_20_data)
both_diff_data <- diff_set(both_10_data,  both_20_data)

merlin_diff_data
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
merlin_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova)

merlin_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova_i)
```

```{r}
pairwise.wilcox.test(merlin_diff_data$increase, merlin_diff_data$trophic_niche)
```
```{r}
library(multcomp)
merlin_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=merlin_diff_data)
summary(merlin_increase_tropic_niche_anova)

merlin_increase_tropic_niche_tukey <- cld(glht(merlin_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))

merlin_increase_tropic_niche_tukey
```

```{r}
plot(merlin_increase_tropic_niche_tukey)
```
```{r}
with.tukey.label.as.group <- function(tukey, dataset, join_by) {
  labels <- data.frame(tukey$mcletters$Letters)
  labels$category <- rownames(labels)
  colnames(labels) <- c("group", "category")
  
  
  left_join(dataset, labels, by = join_by)
}
```

```{r}
merlin_diff_data1 <- with.tukey.label.as.group(merlin_increase_tropic_niche_tukey, merlin_diff_data, c("trophic_niche" = "category"))
merlin_diff_data1
```

```{r}
ggplot(merlin_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(birdlife_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova)

birdlife_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova_i)
```

```{r}
birdlife_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=birdlife_diff_data)
summary(birdlife_increase_tropic_niche_anova)

birdlife_increase_tropic_niche_tukey <- cld(glht(birdlife_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))

birdlife_increase_tropic_niche_tukey
```

```{r}
birdlife_diff_data1 <- with.tukey.label.as.group(birdlife_increase_tropic_niche_tukey, birdlife_diff_data, c("trophic_niche" = "category"))
ggplot(birdlife_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(either_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(both_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
nrow(merlin_10_data[merlin_10_data$response > 0,])
nrow(merlin_20_data[merlin_20_data$response > 0,])
```

```{r}
nrow(birdlife_10_data[birdlife_10_data$response > 0,])
nrow(birdlife_20_data[birdlife_20_data$response > 0,])
```

```{r}
nrow(either_10_data[either_10_data$response > 0,])
nrow(either_20_data[either_20_data$response > 0,])
```

```{r}
nrow(both_10_data[both_10_data$response > 0,])
nrow(both_20_data[both_20_data$response > 0,])
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
interaction.plot(merlin_species_data$foraging_niche, merlin_species_data$percentile, merlin_species_data$response)
```

```{r}
merlin_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova)

merlin_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova_i)
```

```{r}
merlin_increase_foraging_niche_anova <-aov(increase~foraging_niche, data=merlin_diff_data)
summary(merlin_increase_foraging_niche_anova)

merlin_increase_foraging_niche_tukey <- cld(glht(merlin_increase_foraging_niche_anova, linfct=mcp(foraging_niche="Tukey")))

merlin_increase_foraging_niche_tukey
```

```{r}
merlin_diff_data2 <- with.tukey.label.as.group(merlin_increase_foraging_niche_tukey, merlin_diff_data, c("foraging_niche" = "category"))
ggplot(merlin_diff_data2, aes(x = increase, y = foraging_niche, color = group)) + geom_boxplot() + theme_bw()
```


```{r}
ggplot(birdlife_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova)

birdlife_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova_i)
```

```{r}
birdlife_increase_foraging_niche_anova <-aov(increase~foraging_niche, data=birdlife_diff_data)
summary(birdlife_increase_foraging_niche_anova)

birdlife_increase_foraging_niche_tukey <- cld(glht(birdlife_increase_foraging_niche_anova, linfct=mcp(foraging_niche="Tukey")))

birdlife_increase_foraging_niche_tukey
```

```{r}
birdlife_diff_data2 <- with.tukey.label.as.group(birdlife_increase_foraging_niche_tukey, birdlife_diff_data, c("foraging_niche" = "category"))
ggplot(birdlife_diff_data2, aes(x = increase, y = foraging_niche, color = group)) + geom_boxplot() + theme_bw()
```

```{r}
library(ggpubr)
```

```{r}
pc_axis_figure <- function(dataset) {
  annotate_figure(
ggarrange(
  ggplot(dataset, aes(x = pc1, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc2, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc3, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc4, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  nrow = 2, ncol = 2, common.legend = T, label.x = 0),
left = text_grob("log(response)", rot = 90))
}
```

```{r}
pc_axis_figure(merlin_species_data)
```

```{r}
library(lme4)
merlin_pc1_model = lmer(response ~ pc1 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc1_model)
summary(merlin_pc1_model)
```

```{r}
merlin_pc2_model = lmer(response ~ pc2 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc2_model)
summary(merlin_pc2_model)
```

```{r}
merlin_pc3_model = lmer(response ~ pc3 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc3_model)
summary(merlin_pc3_model)
```

```{r}
merlin_pc4_model = lmer(response ~ pc4 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc4_model)
summary(merlin_pc4_model)
```

```{r}
pc_axis_figure(birdlife_species_data)
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
merlin_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova)

merlin_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova_i)
```

```{r}
ggplot(birdlife_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova)

birdlife_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova_i)
```

```{r}
ggplot(merlin_species_data, aes(x = total_species, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw()
```

```{r}
merlin_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_species_count_model)
summary(merlin_species_count_model)
```

```{r}
birdlife_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=birdlife_species_data)
anova(birdlife_species_count_model)
summary(birdlife_species_count_model)
```

```{r}
merlin_species_data$present <- merlin_species_data$response > 0
```

```{r}
ggplot(merlin_species_data, aes(x = log(total_species), y = present, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
merlin_present_model = lmer(present ~ log(total_species) * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_present_model)
summary(merlin_present_model)
```




